//merge gender and inventor data 


//step 1. import gender data

import delimited "${rawdata}all_gender_name.csv", clear
gen female=1 if percent_female > .9
replace female=0 if percent_female < .1
rename name inventor_name_first
save "${filedata}all_gender_name.dta", replace
replace inventor_name_first = upper(inventor_name_first)
save "${filedata}all_gender_namecaps.dta", replace


//step 2. format inventor data and merge with gender data

use "${rawdata}2014/all_inventors.dta", clear

replace inventor_name_first=trim(inventor_name_first)
replace inventor_name_first = upper(inventor_name_first) 
recast str500 inventor_name_first, force

drop if application_number==""
duplicates drop
gen keep=1 if inventor_country_code!="" | (inventor_country_code=="" & inventor_region_code!="") 
drop if keep != 1

//fix inventor region data

gen US_state = 0
foreach x in "AK" "AL" "AR" "AZ" "CA" "CO" "CT" "DC" "DE" "FL" "GA" "HI" "IA" "ID" "IL" "IN" "KS" "KY" "LA" "MA" "MD" "ME" "MI" "MN" "MO" "MS" "MT" "NC" "ND" "NE" "NH" "NJ" "NM" "NV" "NY" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VA" "VT" "WA" "WI" "WV" "WY" {
    replace US_state = 1 if inventor_region_code == "`x'" & US_state == 0 
}

gen inventor_id= inventor_name_first+inventor_name_middle+inventor_name_last+inventor_region_code

merge m:1 inventor_name_first using  "${filedata}all_gender_namecaps.dta"
drop if _merge == 2
keep application_number inventor_id US_state female

save "${filedata}inventor_gender.dta", replace

